*Main data pathway globals, set directory to clean data folder
global results "S:\Project\DemoSos2\common\felles\JR_RG\DrVA\ResultsRev\"
global cleandata "S:\Project\DemoSos2\common\felles\JR_RG\DrVA\CleanData\"
cd "$cleandata"

cap log close
clear all
set more off
set matsize 11000
set maxvar 120000


********************************************************************************************
* multiple measures of VA
********************************************************************************************
*log using "ValueAdded.log", replace

*Load data
use  clean_patientlevel_file_rev3.dta, clear
keep if yr_str_exog_swap >= 2005 & yr_str_exog_swap<=2014
keep if str_exogGPIDnew!=. 


* largest mobility group 
a2group, individual(str_exogGPIDnew) unit(str_exogGPIDprev) groupvar(pair)
bys pair: ge size_pair = _N
tab pair
drop if size_pair < 10 ///largest mobility group has 99%

ge death2 = mortality_2year
for num 25 35 45 55: ge death2_X = mortality_2year if str_exog_age>=X
for var heart cancer ext resp mental accident suicide homicide: ge X2y_55 = X_2y if str_exog_age>=55
for var npr2 npr_noer2 npr_er2 npr_in2 npr_out2 charlindex2 sl_days2: ge X55 = X if str_exog_age>=55

keep if str_exog_age>=55

label var death2_55 	"2-years mortality for 55+"
label var bweight_2y 	"Birthweight"
label var heart2y_55 	"Mortality: heart conditions"
label var cancer2y_55 	"Mortality: cancer"
label var ext2y_55 		"Mortality: external conditions"
label var resp2y_55 	"Mortality: respiratory conditions"
label var mental2y_55 	"Mortality: mental health conditions"
label var accident2y_55	"Mortality: accident conditions"
label var sl_days255 	"Nb sick leave days"
label var npr255 		"Nb of hospitalizations"
label var npr_noer255 	"Nb of hospitalizations - not ER"
label var charlindex255 "Charlson Index"

/* estimate the FE */
global index "death2_55  npr255 npr_noer255 npr_er255 npr_in255 npr_out255 sl_days255 lab_income2 dwoverfor2" 
global controls "i.yr_str_exog_swap i.str_exog_age male" 


foreach var in  $index {
	qui reghdfe `var' $controls, a(_fe_`var'=str_exogGPIDnew fe_old_`var'=str_exogGPIDprev) res(res_`var')  

	predict x_`var' if e(sample), xb
	ge fe2_`var' = _fe_`var' + res_`var' if e(sample) 
	bys str_exogGPIDnew: egen fe_`var' = mean(fe2_`var') 
	ge n`var' = e(N) if e(sample)	
	
}


*---------------------------------------------------------*
* OUTPUT INFO
*---------------------------------------------------------*
foreach x in death2_55 /*$index */ {
	bys str_exogGPIDnew: egen var_`x' = var(fe2_`x') 			// variance of fixed effect within doc
	egen var_across_`x' = var(fe_`x') 						// var of all average fixed effects (across doc)
	bys str_exogGPIDnew:  egen count2_`x' = count(fe_`x') 	// count number of fams per new doc
	bys str_exogGPIDnew:  egen count3_`x' = mean(count2_`x')
	egen shrink_a_`x' = mean(var_`x'/count3_`x') 				//  this is the variance of each doc's FE divided by the number of indiv a doc sees
	ge shrink_b_`x' = var_across_`x' - (var_`x'/count2_`x')
	ge shrinkage_`x'  = shrink_b_`x' / (shrink_b_`x' + shrink_a_`x') 
	egen shrink_coef_`x' = mean(shrinkage_`x') 					//  this is the shrinkage coefficient
	ge afe_`x' = (sqrt(shrink_coef_`x'))*fe_`x'

}


bys str_exogGPIDnew: gen count_doc = _n
twoway 	(kdensity afe_death2_55 if count_doc==1, lcolor(black) lpattern(dash) graphregion(color(white)) bgcolor(white) xtitle(Value Added)) ///
		(kdensity fe_death2_55 if count_doc==1, lcolor(black) lpattern(dot)), legend(order(1 "Adjusted VA" 2 "Non-Adjusted VA")) 
		graph export  "$results\fig1.eps",  replace


*std
foreach var in afe_death2_55  afe_npr255 afe_npr_noer255 afe_npr_er255 afe_npr_in255 afe_npr_out255 afe_sl_days255 {
	egen st_`var' = std(-`var')
}

egen st_afe_lab_income2 = std(afe_lab_income2)
egen st_afe_dwoverfor2 = std(afe_dwoverfor2)


foreach x in $index  {
	kdensity st_afe_`x' if count_doc==1,  generate(newvar_x newvar_d) 
	line newvar_d newvar_x, lcolor(blue) lpattern(solid) lwidth(thick)  graphregion(color(white)) xtitle(Standardized Value) ytitle(Density)
	graph export  "$results\afe_`x'.pdf",  replace
	drop newvar_d newvar_x
}


/* TABLES */
global fe "fe_death2_55 fe_npr255 fe_npr_noer255 fe_npr_er255 fe_npr_in255 fe_npr_out255 fe_sl_days255 fe_lab_income2 fe_dwoverfor2 "
global sfe "st_afe_death2_55 st_afe_npr255 st_afe_npr_noer255 st_afe_npr_er255 st_afe_npr_in255 st_afe_npr_out255 st_afe_sl_days255 st_afe_lab_income2 st_afe_dwoverfor2"

label var fe_death2_55 		"2-years mortality for 55+"
label var fe_sl_days255 	"Sick Leave days for 55+"
label var fe_npr255			"Hospitalizations for 55+"
label var fe_npr_noer255	"Non acute hospitalizations for 55+"
label var fe_npr_er255		"Acute hospitalizations for 55+"


label var st_afe_death2_55 		"2-years mortality for 55+"
label var st_afe_sl_days255 	"Sick Leave days for 55+"
label var st_afe_npr_noer255	"Non acute hospitalizations for 55+"

* basic
eststo s1: estpost tabstat $fe, s(n p10 p25 p50 p75 p90 mean sd) columns(statistics)  
esttab s1 using "$results/fe_unadjusted.tex", replace fragment ///
	cells("p10(fmt(3)) p25(fmt(3)) p50(fmt(3)) p75(fmt(3)) p90(fmt(3)) mean(fmt(3)) sd(fmt(3))") noobs nomtitle ///
	varlabels (`e(labels)')  varwidth(20) booktabs label ///
	collabels("P10" "P25" "P50" "P75" "P90" "Mean" "SD")

* corrected
eststo s2: estpost tabstat $sfe, s(n p10 p25 p50 p75 p90 mean sd) columns(statistics)  
esttab s2 using "$results/fe_adjusted.tex", replace fragment ///
	cells("p10(fmt(3)) p25(fmt(3)) p50(fmt(3)) p75(fmt(3)) p90(fmt(3)) mean(fmt(3)) sd(fmt(3))") noobs nomtitle ///
	varlabels (`e(labels)')  varwidth(20) booktabs label ///
	collabels("P10" "P25" "P50" "P75" "P90" "Mean" "SD")
	
keep if count_doc==1
keep str_exogGPIDnew fe_* afe_* st_*
duplicates drop
save "$results/fe_addon2.dta", replace

*---------------------------------------------------------*
*Correlation between diff types of fe: Table A4
*---------------------------------------------------------*

use  "$results/fe_addon2.dta", clear


replace st_afe_death2_55 = -st_afe_death2_55
foreach var in st_afe_npr255 st_afe_npr_noer255 st_afe_npr_er255 st_afe_npr_in255 st_afe_npr_out255 st_afe_sl_days255  st_afe_lab_income2 {
	eststo `var': reg `var' st_afe_death2_55, robust
}
esttab st_afe_npr255 st_afe_npr_noer255 st_afe_npr_er255 st_afe_npr_in255 st_afe_npr_out255 st_afe_sl_days255  st_afe_lab_income2 using "$results/corr_fe.tex",  replace keep(st_afe_death2_55) ///
	b(3) se(3) star(* 0.1 ** 0.05 *** 0.01) stats(emp mean N, fmt(%9.3g)) 


********************************************************************************************
* Leave-out VA for mortality
********************************************************************************************
use  clean_patientlevel_file_rev3.dta, clear
keep if yr_str_exog_swap >= 2005 & yr_str_exog_swap<=2014
keep if str_exogGPIDnew!=. 


* largest mobility group 
a2group, individual(str_exogGPIDnew) unit(str_exogGPIDprev) groupvar(pair)
bys pair: ge size_pair = _N
tab pair
drop if size_pair < 10 ///largest mobility group has 99%

keep if str_exog_age >=55

ge death2 = mortality_2year
for num 25 35 45 55: ge death2_X = mortality_2year if str_exog_age>=X

/* estimate the FE */
global index "death2_55"
global controls "i.yr_str_exog_swap i.str_exog_age male" 

foreach var in  $index {
	reghdfe `var'  $controls, a(_fe_`var'=str_exogGPIDnew str_exogGPIDprev) res(res_`var')  

	ge fe2_`var' = _fe_`var' + res_`var' if e(sample) 
	bys str_exogGPIDnew: egen fe_`var' = mean(fe2_`var') 
	bys str_exogGPIDnew: egen s = sum(fe2_`var')
	ge d = 1 if e(sample)
	bys str_exogGPIDnew: egen n = sum(d) 
	ge n`var' = e(N) if e(sample)	
	ge lofe_`var' = (s-fe2_`var')/(n-1)
	drop _fe_`var' n d
	
}


*---------------------------------------------------------*
* OUTPUT INFO
*---------------------------------------------------------*
* patient counts
ge d = death2_55!=.
bys str_exogGPIDnew: egen count_pat = sum(d)
drop d

bys str_exogGPIDnew: gen count_doc = _n

foreach x in $index  {
	bys str_exogGPIDnew: egen var_`x' = var(fe2_`x') 			// variance of fixed effect within doc
	egen var_across_`x' = var(lofe_`x') 						// var of all average fixed effects (across doc)
	bys str_exogGPIDnew:  egen count2_`x' = count(lofe_`x') 	// count number of fams per new doc
	bys str_exogGPIDnew:  egen count3_`x' = mean(count2_`x')
	egen shrink_a_`x' = mean(var_`x'/count3_`x') 				//  this is the variance of each doc's FE divided by the number of indiv a doc sees
	ge shrink_b_`x' = var_across_`x' - (var_`x'/count2_`x')
	ge shrinkage_`x'  = shrink_b_`x' / (shrink_b_`x' + shrink_a_`x') 
	egen shrink_coef_`x' = mean(shrinkage_`x') 					//  this is the shrinkage coefficient
	ge afe_`x' = (sqrt(shrink_coef_`x'))*lofe_`x'

}


su shrink_coef_death2_55 if count_doc==1
* nb of GPs with shrinkage over 90%
ge d = shrinkage_death2_55 >.9 if shrinkage_death2_55!=. & count_doc==1
su d
drop d

/* TABLES */
global fe "lofe_death2_55 fe_death2_55 afe_death2_55"

label var fe_death2_55 		"2-years mortality for 55+: baseline"
label var lofe_death2_55 	"2-years mortality for 55+: leave-out"	
label var afe_death2_55 	"2-years mortality for 55+: leave-out adjusted"

eststo s1: estpost tabstat $fe, s(p10 p25 p50 p75 p90 mean sd) columns(statistics)  
esttab s1 using "$results/fe_lo.tex", replace fragment ///
	cells("p10(fmt(3)) p25(fmt(3)) p50(fmt(3)) p75(fmt(3)) p90(fmt(3)) mean(fmt(3)) sd(fmt(3))") noobs nomtitle ///
	varlabels (`e(labels)')  varwidth(20) booktabs label ///
	collabels("P10" "P25" "P50" "P75" "P90" "Mean" "SD")
	
keep lopenr str_exogGPIDnew lofe_death2_55 fe_death2_55 afe_death2_55 count_pat
duplicates drop
save "$results/fe_addon_lo2.dta", replace



use "$results/fe_addon_lo2.dta", clear
collapse fe_death2_55 count_pat, by(str_exogGPIDnew)
sort count_pat
lab var count_pat  "Nb of Patients"
line count_pat fe_death2_55, xline(10 25 50 100) lcolor(black) graphregion(color(white)) bgcolor(white) ytitle(Number of Incoming Patients) xtitle(New GP FE)
graph export  "$results\npat_va.pdf",  replace


reg  fe_death2_55 count_pat
local coef = string(_b[count_pat],"%9.4f")
local se = string(_se[count_pat],"%9.4f")

* Figure A7
ge d = 1 if count_pat <= 25
local j = 25
local i = 1
while `j' <= 600 {
	replace d = `i'+1 if count_pat> `j' & count_pat <= `j' + 25
	local i = `i'+ 1
	local j = `j'+25
}
collapse fe_death2_55 count_pat (sd) sdva=fe_death2_55, by(d)

twoway (scatter fe_death2_55 count_pat, xline(10 25 50 100) mcolor(navy)) (lfit fe_death2_55 count_pat, lcolor(red)), graphregion(color(white)) bgcolor(white) xtitle(Number of Incoming Patients) ytitle(New GP FE) xline(0) legend(off) note("Coefficient=`coef'" "(`se')", position(5) ring(0) size(medsmall))
graph export  "$results\counts_va2.pdf",  replace

	
